*******************************************************************************************************************************
/* Create replication datsets for "Distinguishing barriers to insurance in Thai villages"*/
* Run this do file before generating Tables from Replication.do
*******************************************************************************************************************************

*Set paths and globals 
clear
global root = "/Users//`=c(username)'/dropbox/Insurance/JHR/Tables/"
cd "$root"

****************************************************************************************************
*DATA FOR TABLE 1
****************************************************************************************************
*-------------------------------------------------------------------------------
*YEARLY RAINFALL DATA:
*-------------------------------------------------------------------------------
*Import Annual Data Set
use rain_ivs_annual.dta, clear

*Merge in Annual household consumption and income data
merge 1:1 id year using monthly_annualized_data531.dta
drop _merge

*Merge in Household data with household size
merge m:1 id using demog_baseline.dta
drop if _merge==1 | _merge==2
drop _merge

sort id year
tsset id year
	
egen vill_yr = group(vill year)

*normalize income and expenditure by household members	
gen income_pce = income/adj_total_t
gen exp_pce = exp/adj_total_t

*capital letters denote total, while the others denote PCE:
gen Inc_ihs=log(income + sqrt(income*income+1))
gen Exp_ihs=log(exp+sqrt(exp*exp+1))

gen inc_ihs=log(income_pce + sqrt(income_pce*income_pce+1))
gen exp_ihs=log(exp_pce+sqrt(exp_pce*exp_pce+1))
	
*Demean variables:
bysort id: egen aux=mean(lnexp)
gen dm_lnexp= lnexp-aux
drop aux

bysort id: egen aux=mean(exp_ihs)
gen dm_exp_ihs= exp_ihs-aux
drop aux

bysort id: egen aux=mean(inc_ihs)
gen dm_inc_ihs= inc_ihs-aux
drop aux

bysort id: egen aux=mean(lninc)
gen dm_lninc= lninc-aux
drop aux

foreach qr in 1 2 3 4{
bysort id: egen aux=mean(rain_dev`qr')
gen dm_rain_dev`qr'= rain_dev`qr'-aux
drop aux

bysort id: egen aux=mean(rain_dev`qr'_2)
gen dm_rain_dev`qr'_2= rain_dev`qr'_2-aux
drop aux

		foreach oc in 7 9 11 13 15 19 23 31 61 65 {
		bysort id: egen aux=mean(rain_dev`qr'X`oc')
		gen dm_rain_dev`qr'X`oc'= rain_dev`qr'X`oc'-aux
		drop aux

		bysort id: egen aux=mean(rain_dev`qr'_2X`oc')
		gen dm_rain_dev`qr'_2X`oc'= rain_dev`qr'_2X`oc'-aux
		drop aux
		}	
}

** Save Annual Data Set at the Household Level
save annual.dta , replace

*Results aggregated at the village level:
collapse (mean) lnexp lninc rain_dev1 rain_dev2 rain_dev3 rain_dev4 rain_dev1_2 rain_dev2_2 rain_dev3_2 rain_dev4_2 vill_yr , by(vill year)

sort vill year

**Demean variables:
bysort vill: egen aux=mean(lnexp)
gen lnexp_v = lnexp-aux
drop aux

bysort vill: egen aux=mean(lninc)
gen lninc_v = lninc-aux
drop aux

foreach qr in 1 2 3 4{
bysort vill: egen aux=mean(rain_dev`qr')
gen rain_dev`qr'_v = rain_dev`qr'-aux
drop aux

bysort vill: egen aux=mean(rain_dev`qr'_2)
gen rain_dev`qr'_2_v = rain_dev`qr'_2-aux
drop aux
}

** Save Annual Data Set Collapsed at the Village Level **
save annual_vill.dta , replace

*-------------------------------------------------------------------------------
*OCCUPATIONAL DATA:
*-------------------------------------------------------------------------------
* Import annualized occupation data to generate instruments
use monthly_annualized_data531.dta, clear

keep id year occupation2
sort id year

collapse (firstnm) occupation2, by(id)

gen vill=floor(id/100)

save occupation.dta, replace

*-------------------------------------------------------------------------------
*MONTHLY EXPENDITURE AND INCOME DATA:
*-------------------------------------------------------------------------------
*we use the stay data:
use aggregate_stay.dta, clear

*Merge in Household data with household size
merge m:1 id using demog_baseline.dta
drop if _merge==1 | _merge==2
drop _merge

*we create variables:
drop if month<5
	
gen income=agg_IS_28 
gen exp=agg_IS_29 
	
gen income_pce = income/adj_total_t
gen exp_pce = exp/adj_total_t

sort id mo 
tsset id mo
	
*capital letters denote total, while the others denote PCE (personal consumption expenditures):
gen lnInc=ln(income)
gen lnExp=ln(exp)
gen Inc_ihs=log(income + sqrt(income*income+1)) //IHS denote inverse hyperbolic sine
gen Exp_ihs=log(exp+sqrt(exp*exp+1))
	
gen lninc=ln(income_pce)
gen lnexp=ln(exp_pce)
gen inc_ihs=log(income_pce + sqrt(income_pce*income_pce+1))
gen exp_ihs=log(exp_pce+sqrt(exp_pce*exp_pce+1))

*vill-month, vill-quarter and vill-year effects
gen vill=floor(id/100)
	
egen vill_mo=group(vill mo)
gen year=0
forvalues i=1/7{
replace year=`i' if month>4+12*(`i'-1) & month<5+12*`i'
}

egen vill_yr = group(vill year)
	
gen quarter=0
forvalues i=1/28{
replace quarter=`i' if month>4+3*(`i'-1) & month<5+3*`i'
}
egen vill_qr=group(vill quarter)
	
*we merge with rainfall:
merge m:1 vill month using rain_ivs_monthly.dta
drop _merge
drop month2
	
*we merge with occupation:
merge m:1 id using occupation.dta
drop _merge
		
*Interact the Rain shock with Occupations
foreach oc in 7 9 11 13 15 19 23 31 61 65 {
	gen rain_devX`oc' =.
	gen sq_rain_devX`oc' =.
	replace rain_devX`oc' = rain_dev if occupation2==`oc'
	replace rain_devX`oc' = 0 if occupation2 !=`oc' & occupation2 !=. & rain_dev !=.
	replace sq_rain_devX`oc' = sq_rain_dev if occupation2==`oc'
	replace sq_rain_devX`oc' = 0 if occupation2 !=`oc' & occupation2 !=. & sq_rain_dev !=.
}

sort id mo
*create demeaned variables (all per capita):
*Log exp
bysort id: egen aux=mean(lnexp)
gen dm_lnexp= lnexp-aux
la var dm_lnexp "Log exp."
drop aux

*Log income
bysort id: egen aux=mean(lninc)
gen dm_lninc= lninc-aux
la var dm_lninc "Log income"
drop aux

*IHS exp
bysort id: egen aux=mean(exp_ihs)
gen dm_exp_ihs= exp_ihs-aux
la var dm_exp_ihs "IHS exp."
drop aux

*IHS income
bysort id: egen aux=mean(inc_ihs)
gen dm_inc_ihs= inc_ihs-aux
la var dm_inc_ihs "IHS income"
drop aux

*Rain
bysort id: egen aux=mean(rain_dev)
gen dm_rain_dev= rain_dev-aux
drop aux

bysort id: egen aux=mean(sq_rain_dev)
gen dm_sq_rain_dev= sq_rain_dev-aux
drop aux

*Rain-occupation interactions
foreach oc in 7 9 11 13 15 19 23 31 61 65 {
	bysort id: egen aux=mean(rain_devX`oc')
	gen dm_rain_devX`oc'= rain_devX`oc'-aux
	drop aux

	bysort id: egen aux=mean(sq_rain_devX`oc')
	gen dm_sq_rain_devX`oc'= sq_rain_devX`oc'-aux
	drop aux
}


local dm_rainfall "dm_rain_dev dm_sq_rain_dev dm_rain_devX7 dm_sq_rain_devX7 dm_rain_devX9 dm_sq_rain_devX9 dm_rain_devX11 dm_sq_rain_devX11 dm_rain_devX13 dm_sq_rain_devX13 dm_rain_devX15 dm_sq_rain_devX15 dm_rain_devX19 dm_sq_rain_devX19 dm_rain_devX23 dm_sq_rain_devX23 dm_rain_devX31 dm_sq_rain_devX31 dm_rain_devX61 dm_sq_rain_devX61 dm_rain_devX65 dm_sq_rain_devX65"
local rainfall "rain_dev sq_rain_dev rain_devX7 sq_rain_devX7 rain_devX9 sq_rain_devX9 rain_devX11 sq_rain_devX11 rain_devX13 sq_rain_devX13 rain_devX15 sq_rain_devX15 rain_devX19 sq_rain_devX19 rain_devX23 sq_rain_devX23 rain_devX31 sq_rain_devX31 rain_devX61 sq_rain_devX61 rain_devX65 sq_rain_devX65"

*lagged rainfall shocks:
gen dm_rain_dev_l1 = l.dm_rain_dev
gen dm_rain_dev_l2 = l.dm_rain_dev_l1
gen dm_rain_dev_l3 = l.dm_rain_dev_l2
gen dm_rain_dev_l4 = l.dm_rain_dev_l3
gen dm_rain_dev_l5 = l.dm_rain_dev_l4

gen dm_sq_rain_dev_l1 = l.dm_sq_rain_dev
gen dm_sq_rain_dev_l2 = l.dm_sq_rain_dev_l1
gen dm_sq_rain_dev_l3 = l.dm_sq_rain_dev_l2
gen dm_sq_rain_dev_l4 = l.dm_sq_rain_dev_l3
gen dm_sq_rain_dev_l5 = l.dm_sq_rain_dev_l4

gen rain_dev_l1 = l.rain_dev
gen rain_dev_l2 = l.rain_dev_l1
gen rain_dev_l3 = l.rain_dev_l2
gen rain_dev_l4 = l.rain_dev_l3
gen rain_dev_l5 = l.rain_dev_l4

gen sq_rain_dev_l1 = l.sq_rain_dev
gen sq_rain_dev_l2 = l.sq_rain_dev_l1
gen sq_rain_dev_l3 = l.sq_rain_dev_l2
gen sq_rain_dev_l4 = l.sq_rain_dev_l3
gen sq_rain_dev_l5 = l.sq_rain_dev_l4

foreach oc in 7 9 11 13 15 19 23 31 61 65{
	gen rain_devX`oc'_l1 = l.rain_devX`oc'
	gen rain_devX`oc'_l2 = l.rain_devX`oc'_l1
	gen rain_devX`oc'_l3 = l.rain_devX`oc'_l2
	gen rain_devX`oc'_l4 = l.rain_devX`oc'_l3
	gen rain_devX`oc'_l5 = l.rain_devX`oc'_l4

	gen sq_rain_devX`oc'_l1 = l.sq_rain_devX`oc'
	gen sq_rain_devX`oc'_l2 = l.sq_rain_devX`oc'_l1
	gen sq_rain_devX`oc'_l3 = l.sq_rain_devX`oc'_l2
	gen sq_rain_devX`oc'_l4 = l.sq_rain_devX`oc'_l3
	gen sq_rain_devX`oc'_l5 = l.sq_rain_devX`oc'_l4

	gen dm_rain_devX`oc'_l1 = l.dm_rain_devX`oc'
	gen dm_rain_devX`oc'_l2 = l.dm_rain_devX`oc'_l1
	gen dm_rain_devX`oc'_l3 = l.dm_rain_devX`oc'_l2
	gen dm_rain_devX`oc'_l4 = l.dm_rain_devX`oc'_l3
	gen dm_rain_devX`oc'_l5 = l.dm_rain_devX`oc'_l4

	gen dm_sq_rain_devX`oc'_l1 = l.dm_sq_rain_devX`oc'
	gen dm_sq_rain_devX`oc'_l2 = l.dm_sq_rain_devX`oc'_l1
	gen dm_sq_rain_devX`oc'_l3 = l.dm_sq_rain_devX`oc'_l2
	gen dm_sq_rain_devX`oc'_l4 = l.dm_sq_rain_devX`oc'_l3
	gen dm_sq_rain_devX`oc'_l5 = l.dm_sq_rain_devX`oc'_l4
}


* by high and low R2 values, rainfall's predictive power of income
gen highR2 = .
replace highR2 = 1 if occupation2 == 7 | occupation2 == 23 | occupation2 == 11 | occupation2 == 15
replace highR2 = 0 if missing(highR2)

gen lowR2 = .
replace lowR2 = 1 if occupation2 == 61 | occupation2 == 19 | occupation2 == 13 | occupation2 == 31 | occupation2 == 65 | occupation2 == 9
replace lowR2 = 0 if missing(lowR2)

** Save Monthly Data set with income and expenditure and rainfall shocks
save monthly.dta ,replace

*Results aggregated at the village level:
collapse (mean) lnexp inc_ihs rain_dev sq_rain_dev rain_dev_l2 sq_rain_dev_l2 rain_dev_l3 sq_rain_dev_l3 rain_dev_l4 sq_rain_dev_l4 rain_dev_l5 sq_rain_dev_l5 vill_qr , by(vill month)
sort vill month

*IV using rainfall:
bysort vill: egen aux=mean(lnexp)
gen lnexp_v = lnexp-aux
drop aux

bysort vill: egen aux=mean(inc_ihs)
gen inc_ihs_v = inc_ihs-aux
drop aux

bysort vill: egen aux=mean(rain_dev)
gen rain_dev_v = rain_dev-aux
drop aux

bysort vill: egen aux=mean(sq_rain_dev)
gen sq_rain_dev_v = sq_rain_dev-aux
drop aux

foreach lag in l2 l3 l4 l5{
bysort vill: egen aux=mean(rain_dev_`lag')
gen rain_dev_`lag'_v = rain_dev_`lag'-aux
drop aux

bysort vill: egen aux=mean(sq_rain_dev_`lag')
gen sq_rain_dev_`lag'_v = sq_rain_dev_`lag'-aux
drop aux
}

** Save Monthly Data set with income and expenditure and rainfall shocks at the Village Level
save monthly_vill.dta ,replace
	
****************************************************************************************************
*DATA FOR TABLE 2 AND TABLE 3
****************************************************************************************************	
mat A = J(10,15,.)
	
*we use the stay data:
use aggregate_stay.dta ,clear

*we create variables:
drop if month<5
	
gen income=agg_IS_28 
gen exp=agg_IS_29 

sort id mo 
tsset id mo

gen lnInc=ln(income)
gen lnExp=ln(exp)

gen lag_Exp=l.exp
la var lag_Exp "Lagged consumption"

gen l_lninc=l.lnInc


*vill-month, vill-quarter and vill-year effects
gen vill=floor(id/100)
egen vill_mo=group(vill mo)
	
gen year=0
forvalues i=1/7{
replace year=`i' if month>4+12*(`i'-1) & month<5+12*`i'
}
egen vill_yr = group(vill year)
	
gen quarter=0
forvalues i=1/28{
replace quarter=`i' if month>4+3*(`i'-1) & month<5+3*`i'
}
egen vill_qr=group(vill quarter)
	
*generate lagged income and inverse sine transf:
gen l_income=l.income
gen l_inc_ihs=log(l_income + sqrt(l_income*l_income+1))
gen l_inc_ihs_var=log(l_income + sqrt(l_income+1))
la var l_inc_ihs "Lagged IHS income"

	
*genereate inverse sine transformation of current income
gen inc_ihs=log(income + sqrt(income*income+1))

*generate eta (within-village equivalent to eta) eta*lag of consumption
bysort vill_mo: egen auxiliar_0=sum(exp)
gen auxiliar_1=1
bysort vill_mo: egen auxiliar_2=sum(auxiliar_1)
gen auxiliar_5=auxiliar_0-exp
gen auxiliar_6=auxiliar_2-1
gen eta=auxiliar_5/auxiliar_6
drop auxiliar_*
gen exp_eta = lag_Exp*eta
		
*quadratic splines:
bspline, xvar(lag_Exp) power(2) generate(lag_expq)
bspline, xvar(eta) power(2) generate(etaq)
bspline, xvar(exp_eta) power(2) generate(exp_etaq)

*quadratic splines with flexcurv:
gen lag_exp_eta=lag_Exp*eta
flexcurv, xvar(lag_Exp) power(2) generate(lag_expf) refpts(183.6667 5340.7918 313112)
flexcurv, xvar(eta) power(2) generate(etaf) refpts(1475.881 5347.684  21954.77)
flexcurv, xvar(exp_eta) power(2) generate(exp_etaf) refpts(545449.9  3.21e+07  3.01e+09)

sort id

merge m:1 id using "rain and income vars_v3.dta"

gen r2_l_inc_ihs = l_inc_ihs*r2_rain
la var r2_rain "Rainfall  Rsq"
la var r2_l_inc_ihs "Rainfall Rsq X lagged IHS income"

save lagged_aggregate_stay.dta ,replace


****************************************************************************************************
*DATA FOR TABLES 4, B1 and C2
****************************************************************************************************
*Import annualized data:
use monthly_annualized_data531.dta, clear



foreach z in vil fgn {
la var  gm1c_`z' "gifts given to orgs"
la var  gm2c_`z' "gifts given for events"
la var  gm4c_`z' "gifts recd from orgs"
la var  gm5c_`z' "gifts recd for events"
}

la var  gm3a3 "gifts given to people in vill"
la var  gm3b3 "gifts given to people outside vill"
la var  gm6a3 "gifts recd from people in vill"
la var  gm6b3 "gifts recd from people outside vill"

gen lnexp=ln(exp_pc)

//Real 2002 baht
tokenize 97.7 97.8 99.2 100 101.9 106.7 113.5 

gen incomeN=income
la var incomeN "Nominal income"

gen expN=exp
la var expN "Nominal expenditure"

gen exp_pcN=exp_pc
la var exp_pcN "Nominal PC expenditure"

forvalues y=1999/2005 {
	disp `y'
	disp `1'
	foreach var in  income exp exp_pc {
		replace `var'=100*`var'/`1' if year==`y'
	}
	macro shift
}

la var income "Real income"

la var exp "Real expenditure"

la var exp_pc "Real PC expenditure"

gen inc_pc=income/adj_total_t

la var inc_pc "Real PC income"


bysort year: egen highexp95=pctile(exp_pc), p(95)
bysort year: egen lowexp05=pctile(exp_pc), p(5)

bysort year: egen highexp90=pctile(exp_pc), p(90)
bysort year: egen lowexp10=pctile(exp_pc), p(10)

**Village Level IV
xtset vill

*Take out Village-yr component
qui xi: reg lnexp  i.vill_year
predict lnexp_, r

qui xi: reg lninc  i.vill_year
predict lninc_, r

*Take out Individual component
qui xi: reg lnexp  i.id
predict lnexp_2, r

qui xi: reg lninc  i.id
predict lninc_2, r

sort id year
tsset id year

gen l_lninc_=l.lninc_
gen l_lninc_2=l.lninc_2

capture drop rank
bysort vill year: egen rank=rank(exp_pc)

gen earned_inc1= agg_IS_07
egen costs=rowtotal(agg_IS_08 agg_IS_09 agg_IS_10 agg_IS_11 agg_IS_12 agg_IS_13 agg_IS_14)
gen earned_inc2= agg_IS_07-costs

gen remit_inc1=gm6b3
gen remit_inc2=gm6b3- gm3b3

gen laborinc1= agg_IS_05
gen laborinc2= agg_IS_05-agg_IS_14

gen bizinc1= agg_IS_04
gen bizinc2= agg_IS_04-agg_IS_13

gen aginc1=  agg_IS_01+ agg_IS_02+ agg_IS_03
gen aginc2= agg_IS_01+ agg_IS_02+ agg_IS_03- agg_IS_08- agg_IS_09-agg_IS_12

gen nonlaborinc1= earned_inc1-laborinc1
gen nonlaborinc2= earned_inc2-laborinc2

gen cult_inc1=  agg_IS_01
gen cult_inc2= agg_IS_01- agg_IS_08

gen live_inc1=  agg_IS_02
gen live_inc2= agg_IS_02- agg_IS_09

gen fish_inc1=  agg_IS_03
gen fish_inc2= agg_IS_03-agg_IS_12

foreach var in 	earned_inc1 earned_inc2 remit_inc1 remit_inc2  laborinc1 laborinc2 bizinc1 bizinc2 aginc1 aginc2 ///
cult_inc1 cult_inc2 live_inc1 live_inc2 fish_inc1 fish_inc2 {
				gen `var'_u=`var'
				replace `var'=.00001*`var'
			}

cap drop rank
bysort vill year: egen rank=rank(exp_pc) if year==1999
bysort id: egen rank99=max(rank)
drop rank


*** Data for testing Amnesia
tsset id year
*Generate lnexp_pc is the log of per capita expenditure, that is, current inverse marginal utility
gen imu=lnexp_pc 
*Generate lagged log per capita expenditure, that is it is proportional to lagged inverse marginal utility under CRRA utility 
gen imu_old=l.lnexp_pc 	
*Generate the change in inverse marginal utility between t-1 and t				
gen del_gamma=imu/imu_old 
sum del

*Calculate quartiles of consumption growth relative to that year and village
bysort vill year: egen r0=min(del_gamma)
bysort vill year: egen r25=pctile(del_gamma), p(25)
bysort vill year: egen r50=pctile(del_gamma), p(50)
bysort vill year: egen r75=pctile(del_gamma), p(75)

*Create Indicator variables for falling into a given quartile
gen p75=(del_gamma>=r75&del_gamma!=.)
gen p50=(del_gamma>=r50&del_gamma<r75)
gen p25=(del_gamma>=r25&del_gamma<r50)
gen p0=(del_gamma<r25)

*Set quartile indicator variables to missing if consumption growth is missing, and interact the quartiles with lagged inverse marginal utility
foreach v in 0 25 50 75 {
replace p`v'=. if del_gamma==.
gen limuX`v'=imu_old*p`v'

gen lnincX`v'=lninc*p`v'
}

save monthly_annualized_data531_new.dta, replace
